import re

import pandas as pd
from pandas import DataFrame
from sqlalchemy import create_engine


def washData():
    df: DataFrame = pd.read_excel('video_data1.xlsx')

    pattern = re.compile(r'入站必刷|全站排行榜最高第?\d*名?')

    for i in range(1, len(df)):
        if pattern.match(str(df.at[i, 'must_see'])):
            df.at[i, 'must_see'] = df.at[i - 1, 'must_see']

    # 删除包含空值的行
    df.dropna(subset=['must_see'], inplace=True)
    df['must_see'] = df['must_see'].apply(lambda x: int(str(x).replace('第', '').replace('期每周必看', '')))
    df['view_counts'] = df['view_counts'].apply(washViews)
    df['danmu'] = df['danmu'].apply(washNumber)
    df['video_time'] = pd.to_datetime(df['video_time'])
    df['like'] = df['like'].apply(washNumber)
    df['insert_coins'] = df['insert_coins'].apply(washNumber)
    df['collect'] = df['collect'].apply(washNumber)
    df['transmit'] = df['transmit'].apply(washNumber)
    df['attention'] = df['attention'].apply(lambda x: float(str(x).replace('关注 ', '').replace('万', '')) * 10000)
    df = df.drop(columns=['Unnamed: 0'])

    # 存储到 MySQL
    # df.index.name = 'id'
    engine = create_engine('mysql+pymysql://root:123456@localhost:3306/bilibili?charset=utf8mb4')
    df.to_sql('video_data', con=engine, if_exists='replace', index_label='id')
    # df.to_excel("wash_video_data.xlsx")


def washViews(x):
    if '万' in x:
        return float(str(x).replace('万', '')) * 10000
    elif '亿' in x:
        return float(str(x).replace('亿', '')) * 100000000
    return float(x)


def washNumber(x):
    if '万' in x:
        return float(str(x).replace('万', '')) * 10000
    return float(x)


if __name__ == '__main__':
    washData()
